require(plyr)
require(dplyr)
require(tidyr)
require(lmtest)
require(stargazer)
require(sandwich)
require(ggmap)
require(splines)
require(broom)
require(data.table)
require(dotwhisker)
require(gdata)

# Set working directory
setwd("C:/Users/Eric/Dropbox/Racial discourse/io_replication/")

#### IMPORT MAIN DATA ####

d = fread("./data/tropeData.csv")

# Redefine geographic regions as factors
d$geo_region = factor(d$geo_region, 
                       levels = c("Western Europe", 
                                  "Americas",
                                  "Eastern Europe", 
                                  "Middle East/Northern Africa", 
                                  "Sub-Saharan Africa", 
                                  "Asia",  
                                  "Oceania"))
d$geo_region2 = factor(d$geo_region2, 
                        levels = c("Western Europe", 
                                   "Americas",
                                   "Eastern Europe", 
                                   "Middle East/Northern Africa", 
                                   "Sub-Saharan Africa", 
                                   "Asia",  
                                   "Oceania",
                                   "Vietnam", "USSR", "South Africa"))


#### Table 3: Example PDB entries with high predicted probability of exhibiting key themes ####
# Infantilization
d[grep("Brown reports Phoumi may now be trying", d$text), c("date", "pdbID", "predINF")][1] 

# Animal analogy
d[grep("Anti-Chinese feeling is rising", d$text), c("date", "pdbID", "predANIM")][1]

# Belligerence
d[grep("Kenyan Government is adopting an increasingly", d$text), c("date", "pdbID", "predBELLIG")][1]

# Irrationality
d[grep("We are expecting some anti-American demonstrations", d$text), c("date", "pdbID", "predIRR")][1]


#### Figure 1: Histograms of dependent variables ####

#### Figure 1(a): Predicted probabilities from supervised learning models ####
d |> distinct(entryID, .keep_all = T) |>
  dplyr::select(entryID, predINF, predANIM, predBELLIG, predIRR) |>
  pivot_longer(starts_with("pred"), names_to="trope", values_to="prob") |>
  mutate(trope=factor(trope, levels=c("predINF", "predANIM", "predBELLIG", "predIRR"),
                      labels=c("Infantilization", "Animal analogy", "Belligerence", "Irrationality"))) |>
  ggplot(aes(prob)) + geom_histogram(fill="gray30") + facet_wrap(~trope) +
  theme_bw() + xlab("Probability") + ylab("Entries")

ggsave("./figures/tropePredHistograms.pdf", width=4.5, height=3.5)


#### Figure 1(b): Counts from synonym-based dictionaries ####
d |> distinct(entryID, .keep_all = T) |>
  dplyr::select(entryID, countINF, countANIM, countBELLIG, countIRR) |>
  pivot_longer(starts_with("count"), names_to="trope", values_to="count") |>
  mutate(trope = factor(trope, levels=c("countINF", "countANIM", "countBELLIG", "countIRR"),
                        labels=c("Infantilization", "Animal analogy", "Belligerence", "Irrationality"))) |>
  ggplot(aes(count)) + geom_histogram(fill="gray30", binwidth = 1) + facet_wrap(~trope) +
  theme_bw() + xlab("Count") + ylab("Entries") + coord_cartesian(xlim=c(-0.5,5.5))

ggsave("./figures/tropeCountHistograms.pdf", width=4.5, height=3.5)


#### Figure 2: Depictions of explanatory variables ####

# Set up world map #

# Import country/region data
careg = fread("./data/regionData.csv")
cakey = careg |> dplyr::select(mapname, globalsouth, geo_region) |> filter(mapname!="")

# Make the map object
mapdata = map_data("world")
mapmerge = left_join(mapdata, cakey, by=c("region"="mapname"))
mapmerge = mapmerge |> drop_na(region) |>
  filter(region!="Antarctica") |>
  distinct(order, .keep_all = T) |> arrange(order) 

# Reassign Greenland as Western Europe and treat United States as NA for plot
mapmerge$globalsouth[which(mapmerge$region=="Greenland")] = 0
mapmerge$geo_region[which(mapmerge$region=="Greenland")] = "Western Europe"
mapmerge$geo_region[which(mapmerge$region=="USA")] = NA
mapmerge$geo_region = factor(mapmerge$geo_region, 
                             levels = c("Western Europe", 
                                        "Americas",
                                        "Eastern Europe", 
                                        "Middle East/Northern Africa", 
                                        "Sub-Saharan Africa", 
                                        "Asia",  
                                        "Oceania"))

# Import independence data created in addFeatures.R file 
ysiData = fread("./data/ysiData.csv")
ccsplit = strsplit(ysiData$ccyear, " ")
ysiData$ccode = as.numeric(sapply(ccsplit, "[[", 1))
ysiData$year = sapply(ccsplit, "[[", 2)
ysid = ysiData |> filter(year==1977) |> dplyr::select(-ccyear, -year, -decol)

# Extract only relevant variables from country/region data to make this map
cakey2 = careg |> dplyr::select(mapname, ccode) |> filter(mapname!="")

# Make map object to plot years since independence
mapdata = map_data("world")
mapmergeC = left_join(mapdata, cakey2, by=c("region"="mapname"))
mapmergeC = left_join(mapmergeC, ysid, by=c("ccode"))


#### Figure 2(a): Map of the "Global South" (Group of 77 and China) ####
mapGS = distinct(mapmerge, .keep_all=T) |>
  drop_na(globalsouth) |>
  ggplot(aes(map_id=region, fill=factor(globalsouth))) + 
  geom_map(map=mapmerge) + 
  theme_bw() + 
  theme(axis.text.x=element_blank(),
        axis.text.y=element_blank()) +
  geom_polygon(data=mapmerge, aes(x=long, y=lat, group=group), colour="black", linewidth=0.1) +
  scale_fill_manual("Global\nSouth", values=c("gray80", "gray50"), labels=c("No", "Yes")) + xlab("") + ylab("") +
  ylim(-55, 85) +
  theme(legend.position = "bottom")
mapGS
ggsave("./figures/mapGlobalSouth.pdf", plot=mapGS, width=7, height=4.25)


#### Figure 2(b): Years since independence in 1977 ####
mapYSI = distinct(mapmergeC, region, .keep_all=T) |> 
  ggplot(aes(map_id=region, fill=log(ysi))) + 
  geom_map(map=mapmergeC) + 
  theme_bw() + 
  theme(axis.text.x=element_blank(),
        axis.text.y=element_blank()) +
  geom_polygon(data=mapmergeC, aes(x=long, y=lat, group=group), colour="black", linewidth=0.1) +
  xlab("") + ylab("") +
  ylim(-55, 85) +
  scale_fill_gradient("Years\nsince\nindep.", 
                      low="gray60", high="gray10", na.value = "gray95",
                      breaks=c(0, 1, 2, 3, 4),
                      labels=c(0, 2, 6, 19, 54)) +
  theme(legend.position = "bottom")
mapYSI
ggsave("./figures/mapYSI.pdf", plot=mapYSI, width=7, height=4.5)


#### REGRESSION MODELS ####

#### Standard models ####

# Define variables and labels
controls1 = "+ conf_high + 
                   democracy + 
                   personal + 
                   leaderMention + 
                   UStrade + 
                   USmilaid + 
                   USdefense + 
                   logWords + factor(country)"
varLabels1 = c("Global South", "Years since independence", 
               "Conflict", 
               "Democracy", 
               "Personalism", 
               "Leader mention", 
               "US trade", 
               "US military aid", 
               "US defense", 
               "Entry length")

theIV = "~ globalsouth + logYSI"
predControls = controls1
countControls = controls1
varLabels = varLabels1
dataset="d"
showPreds = TRUE

# Run regressions and store results
source("./code/doRegressions.R")
modelsPlain = outList


#### Table 4: Results of regressions ####

# Standard estimates (not shown in text)
# Note that the number of observations in each model appears in this code.
stargazer(modelsPlain[1:8],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels)

# Estimates with clustered standard errors (shown in text)
# Note that the number of observations in each model do not appear in this code.
# Number of observations in each model are reported in the standard estimates above.
stargazer(modelsPlain[9:16],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels)


#### Table 5: Predicted values from models in Table 4 ####
createPredictions(modelsPlain)

# Mean values in the table
d |> dplyr::select(starts_with("pred")|starts_with("count")) |>
  summarize_if(is.numeric, mean) |>
  round(3)


#### Models that add STM results ####

# Define variables and labels
controls1a = paste0(controls1, "+ factor(admin) + splines::bs(date, 3)")
keyTopics = paste("+", paste(paste0("Topic", c(1,2,3,4,5,6,8,9,
                                               11,13,14,17,
                                               20,21,22,24,26,28,
                                               38,
                                               41,43,44,45,46,47,
                                               50,52,53,54,55,59,
                                               60,61,62,64)), collapse=" + "))
controlsSTM = paste0(controls1, keyTopics)
controlsSTMa = paste0(controls1a, keyTopics)

theIV = "~ globalsouth + logYSI"
predControls = controlsSTM
countControls = controlsSTM
varLabels = varLabels1
dataset="d"
showPreds = FALSE

# Run regressions and store results
source("./code/doRegressions.R")
modelsSTM = outList


#### Figure 3: Coefficient plots of regressions in Table 4 and additional models that include topic prevalence measures ####

# Tidied data from quasibinomial models
coef_p_inf_c = tidy(modelsPlain$p_inf_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_anim_c = tidy(modelsPlain$p_anim_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_bel_c = tidy(modelsPlain$p_bel_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_irr_c = tidy(modelsPlain$p_irr_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))

# Tidied data from Poisson models
coef_c_inf_c = tidy(modelsPlain$c_inf_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_anim_c = tidy(modelsPlain$c_anim_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_bel_c = tidy(modelsPlain$c_bel_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_irr_c = tidy(modelsPlain$c_irr_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))

# Tidied data from quasibinomial models with STM
coef_p_inf_cs = tidy(modelsSTM$p_inf_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_anim_cs = tidy(modelsSTM$p_anim_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_bel_cs = tidy(modelsSTM$p_bel_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_irr_cs = tidy(modelsSTM$p_irr_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))

# Tidied data from Poisson models with STM
coef_c_inf_cs = tidy(modelsSTM$c_inf_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_anim_cs = tidy(modelsSTM$c_anim_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_bel_cs = tidy(modelsSTM$c_bel_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_irr_cs = tidy(modelsSTM$c_irr_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))

# Put all together
allcoefs = rbind(coef_p_inf_cs, coef_p_anim_cs, coef_p_bel_cs, coef_p_irr_cs,
                 coef_c_inf_cs, coef_c_anim_cs, coef_c_bel_cs, coef_c_irr_cs,
                 coef_p_inf_c, coef_p_anim_c, coef_p_bel_c, coef_p_irr_c,
                 coef_c_inf_c, coef_c_anim_c, coef_c_bel_c, coef_c_irr_c)
allcoefs$model = rep(c("Probabilities + Topics\n(Quasibinomial)", "Counts + Topics\n(Poisson)", "Probabilities\n(Quasibinomial)", "Counts\n(Poisson)"), each=40)
allcoefs$model = factor(allcoefs$model, levels=c( "Counts + Topics\n(Poisson)", "Probabilities + Topics\n(Quasibinomial)", "Counts\n(Poisson)", "Probabilities\n(Quasibinomial)"))
allcoefs$trope = rep(rep(c("Infantilization", "Animal Analogies", "Belligerence", "Irrationality"), each=10), 4)
allcoefs$trope = factor(allcoefs$trope, levels=c("Infantilization", "Animal Analogies", "Belligerence", "Irrationality"))
allcoefs = allcoefs |> filter(grepl("globalsouth|logYSI", term))

# Relabel variables for plot
allcoefs = allcoefs |> relabel_predictors(c(globalsouth="Global South",
                                             logYSI="Years since\nindependence",
                                             conf_high="Conflict",
                                             democracy="Democracy",
                                             personal="Personalism",
                                             leaderMention="Leader mention",
                                             UStrade="US trade",
                                             USmilaid="US military aid",
                                             USdefense="US defense",
                                             logWords="Entry length"))

# Variable for 95% statistical significance
allcoefs$signif = ifelse(allcoefs$p.value < 0.05, "Yes", "No")

# Make plot
ggplot(allcoefs, aes(fct_rev(term), estimate)) + geom_hline(yintercept = 0, linetype=2, color="gray40", linewidth=0.3) + 
  geom_pointrange(aes(color=model, y=estimate, ymin=estimate-1.96*std.error,
                      ymax=estimate+1.96*std.error, shape=signif), position = position_dodge(0.8), size=0.3) + 
  coord_flip(ylim=c(-1.5,5.5)) + theme_bw() +
  scale_shape_manual("95% Significant", values=c(16, 15)) + 
  scale_color_manual("Model", guide=guide_legend(reverse=TRUE), values=c("gray70", "gray50", "gray30", "gray10")) + 
  ylab("Estimate") + xlab("") +
  theme(legend.position = "bottom") + facet_grid(~trope) + guides(shape="none")
ggsave("./figures/coefPlotsShort.pdf", width=8.75, height=3.5)


#### Figure 4: Map of geographical regions ####
mapRegion = distinct(mapmerge, region, .keep_all=T) |>
  drop_na(geo_region) |>
  ggplot(aes(map_id=region, fill=geo_region)) + 
  geom_map(map=mapmerge) + 
  theme_bw() + 
  theme(axis.text.x=element_blank(),
        axis.text.y=element_blank()) +
  geom_polygon(data=mapmerge, aes(x=long, y=lat, group=group), colour="black", linewidth=0.1) +
  scale_fill_discrete("Region") +
  xlab("") + ylab("") +
  ylim(-55, 85) +
  theme(legend.position = "bottom")
mapRegion
ggsave("./figures/mapRegions.pdf", plot=mapRegion, width=7, height=4.5)


#### Models using regions ####
varLabels2 = c("Americas",
               "Eastern Europe", 
               "Middle East/Northern Africa", 
               "Sub-Saharan Africa", 
               "Asia",  
               "Oceania",
               "Years since independence", "Conflict", "Democracy", "Personalism", 
               "Leader mention", "US trade", "US military aid", "US defense", "Entry length")

theIV = "~ geo_region + logYSI"
predControls = controlsSTM
countControls = controlsSTM
varLabels = varLabels2
dataset="d"
showPreds = FALSE

source("./code/doRegressions.R")
modelsRegion = outList


#### Figure 5: Coefficient plots of regressions disaggregating countries by region ####

# Tidied data from quasibinomial models
coef_p_inf_c = tidy(modelsRegion$p_inf_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_anim_c = tidy(modelsRegion$p_anim_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_bel_c = tidy(modelsRegion$p_bel_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_p_irr_c = tidy(modelsRegion$p_irr_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))

# Tidied data from Poisson models
coef_c_inf_c = tidy(modelsRegion$c_inf_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_anim_c = tidy(modelsRegion$c_anim_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_bel_c = tidy(modelsRegion$c_bel_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))
coef_c_irr_c = tidy(modelsRegion$c_irr_c) |> filter(!grepl("Topic|country|Intercept|admin|date", term))

# Put all together
allcoefs = rbind(coef_p_inf_c, coef_p_anim_c, coef_p_bel_c, coef_p_irr_c,
                 coef_c_inf_c, coef_c_anim_c, coef_c_bel_c, coef_c_irr_c)
allcoefs$model = rep(c("Probabilities\n(Quasibinomial)", "Counts\n(Poisson)"), each=nrow(allcoefs)/2)
allcoefs$model = factor(allcoefs$model, levels=c("Counts\n(Poisson)", "Probabilities\n(Quasibinomial)"))
allcoefs$trope = rep(rep(c("Infantilization", "Animal Analogies", "Belligerence", "Irrationality"), each=length(unique(allcoefs$term))), 2)
allcoefs$trope = factor(allcoefs$trope, levels=c("Infantilization", "Animal Analogies", "Belligerence", "Irrationality"))
allcoefs = allcoefs |> filter(grepl("region|logYSI", term))

# Relabel variables for plot
allcoefs = allcoefs |> relabel_predictors(c(geo_regionAmericas="Americas",
                                             geo_regionAsia="Asia",
                                             geo_regionOceania="Oceania",
                                             'geo_regionEastern Europe'="Eastern Europe",
                                             'geo_regionMiddle East/Northern Africa' = "Middle East/Northern Africa",
                                             'geo_regionSub-Saharan Africa'="Sub-Saharan Africa",                                             
                                             logYSI="Years since\nindependence",
                                             conf_high="Conflict",
                                             democracy="Democracy",
                                             personal="Personalism",
                                             leaderMention="Leader mention",
                                             UStrade="US trade",
                                             USmilaid="US military aid",
                                             USdefense="US defense",
                                             logWords="Entry length"))

# Variable for 95% statistical significance
allcoefs$signif = ifelse(allcoefs$p.value < 0.05, "Yes", "No")

# Reorganize
allcoefs$term = factor(allcoefs$term,
                       levels=c("Americas",
                                "Eastern Europe", 
                                "Middle East/Northern Africa", 
                                "Sub-Saharan Africa", 
                                "Asia",  
                                "Oceania",
                                "Years since\nindependence", "Conflict", "Democracy",
                                "Personalism", "Leader mention", "US trade",
                                "US military aid", "US defense", "Entry length"))

# Make plot
ggplot(allcoefs, aes(fct_rev(term), estimate)) + geom_hline(yintercept = 0, linetype=2, color="gray40", linewidth=0.3) + 
  geom_pointrange(aes(color=model, y=estimate, ymin=estimate-1.96*std.error,
                      ymax=estimate+1.96*std.error, shape=signif), position = position_dodge(0.5), size=0.3) + 
  coord_flip() + 
  theme_bw() +
  scale_shape_manual("95% Significant", values=c(16, 15)) + 
  scale_color_manual("Model", guide=guide_legend(reverse=TRUE), values=c("gray50", "gray10")) + 
  ylab("Estimate") + xlab("") +
  theme(legend.position = "bottom") + facet_grid(~trope, scales="free_x") + guides(shape="none")
ggsave("./figures/coefPlotsRegions.pdf", width=8.75, height=5)


#### ROLLING REGRESSIONS ####

# Define range of time
yrange = 6

# Years to consider
years = 1961:(1977-yrange)

# Do rolling regressions
timeListGS = timeListReg = regCountList = list()
for (k in 1:length(years)) {
  
  # Start and end years
  startyear = years[k]
  endyear = years[k] + yrange
  
  # Get the data from those years
  dds = d |> filter(year >= startyear & year <= endyear)
  
  # Main models
  theIV = "~ globalsouth + logYSI"
  predControls = controlsSTM
  countControls = controlsSTM
  varLabels = varLabels1
  dataset="dds"
  showCoefs = FALSE
  
  # Run models
  source("./code/doRegressions.R")
  modelsGS = outList
  
  # Gather results
  mp_inf = tidy(modelsGS[[9]]) |> mutate(model="Probabilities", trope="Infantilization")
  mp_anim = tidy(modelsGS[[10]]) |> mutate(model="Probabilities", trope="Animal analogy")
  mp_bel = tidy(modelsGS[[11]]) |> mutate(model="Probabilities", trope="Belligerence")
  mp_irr = tidy(modelsGS[[12]]) |> mutate(model="Probabilities", trope="Irrationality")
  
  mc_inf = tidy(modelsGS[[13]]) |> mutate(model="Counts", trope="Infantilization")
  mc_anim = tidy(modelsGS[[14]]) |> mutate(model="Counts", trope="Animal analogy")
  mc_bel = tidy(modelsGS[[15]]) |> mutate(model="Counts", trope="Belligerence")
  mc_irr = tidy(modelsGS[[16]]) |> mutate(model="Counts", trope="Irrationality")
  
  mall = bind_rows(mp_inf, mp_anim, mp_bel, mp_irr,
                   mc_inf, mc_anim, mc_bel, mc_irr)
  
  mall = mall |> filter(grepl("globalsouth|logYSI", term))
  mall = mall |> relabel_predictors(c(globalsouth="Global South",
                                       logYSI="Years since\nindependence"))
  mall$signif = ifelse(mall$p.value < 0.05, "Yes", "No")
  
  # Start and end years, as well as total number of entries in the time window
  mall = mall |> mutate(start=startyear, end=endyear, windowEntries = nrow(dds))
  
  # Store results
  timeListGS[[k]] = mall
  
  ## Next, models using regions 
  theIV = "~ geo_region + logYSI"
  predControls = controlsSTM
  countControls = controlsSTM
  varLabels = varLabels2
  dataset="dds"
  showCoefs = FALSE
  
  # Run models
  source("./code/doRegressions.R")
  modelsReg = outList
  
  # Gather results
  mp_inf = tidy(modelsReg[[9]]) |> mutate(model="Probabilities", trope="Infantilization")
  mp_anim = tidy(modelsReg[[10]]) |> mutate(model="Probabilities", trope="Animal analogy")
  mp_bel = tidy(modelsReg[[11]]) |> mutate(model="Probabilities", trope="Belligerence")
  mp_irr = tidy(modelsReg[[12]]) |> mutate(model="Probabilities", trope="Irrationality")
  
  mc_inf = tidy(modelsReg[[13]]) |> mutate(model="Counts", trope="Infantilization")
  mc_anim = tidy(modelsReg[[14]]) |> mutate(model="Counts", trope="Animal analogy")
  mc_bel = tidy(modelsReg[[15]]) |> mutate(model="Counts", trope="Belligerence")
  mc_irr = tidy(modelsReg[[16]]) |> mutate(model="Counts", trope="Irrationality")
  
  mall = bind_rows(mp_inf, mp_anim, mp_bel, mp_irr,
                   mc_inf, mc_anim, mc_bel, mc_irr)
  
  mall = mall |> filter(grepl("region", term))
  
  mall = mall |> relabel_predictors(c(geo_regionAmericas="Americas",
                                       geo_regionAsia="Asia",
                                       geo_regionOceania="Oceania",
                                       'geo_regionEastern Europe'="Eastern Europe",
                                       'geo_regionMiddle East/Northern Africa' = "Middle East/Northern Africa",
                                       'geo_regionSub-Saharan Africa'="Sub-Saharan Africa",                                             
                                       logYSI="Years since\nindependence",
                                       conf_high="Conflict",
                                       democracy="Democracy",
                                       personal="Personalism",
                                       leaderMention="Leader mention",
                                       UStrade="US trade",
                                       USmilaid="US military aid",
                                       USdefense="US defense",
                                       logWords="Entry length"))
  mall$signif = ifelse(mall$p.value < 0.05, "Yes", "No")
  
  mall$term = factor(mall$term, 
                     levels=c("Americas",
                              "Eastern Europe", 
                              "Middle East/Northern Africa", 
                              "Sub-Saharan Africa", 
                              "Asia",  
                              "Oceania",
                              "Years since\nindependence", "Conflict", "Democracy",
                              "Personalism", "Leader mention", "US trade",
                              "US military aid", "US defense", "Entry length"))
  
  # Start and end years, as well as total number of entries in the time window
  mall = mall |> mutate(start=startyear, end=endyear, windowEntries = nrow(dds))
  
  # Add in counts of entries for each region
  regCounts = data.frame(table(dds$geo_region))
  mall = merge(mall, regCounts, by.x="term", by.y="Var1", all.x=T)
  
  # Store results
  timeListReg[[k]] = mall
  
  # Message indicating what year has been completed
  message(years[k])
}

# Put data together
timedataGS = rbindlist(timeListGS)
timedataGS$trope = factor(timedataGS$trope, levels=c("Infantilization", "Animal analogy", "Belligerence", "Irrationality"))
timedataReg = rbindlist(timeListReg)
timedataReg$trope = factor(timedataReg$trope, levels=c("Infantilization", "Animal analogy", "Belligerence", "Irrationality"))


#### Figure 6: Coefficient estimates for the Global South variable, using a moving seven-year temporal window ####
timedataGS |> 
  filter(term!="Years since\nindependence") |>
  ggplot(aes(start+yrange/2, estimate)) + 
  geom_hline(yintercept=0, linetype=2) +
  geom_line(aes(color=fct_rev(model))) +
  geom_linerange(aes(ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error, color=fct_rev(model))) +
  facet_wrap(~trope, scales = "free_y") +
  ylab("Estimate") +
  theme_bw() + scale_x_continuous("Year", 
                                  breaks=c(1963, 1967, 1971, 1975), 
                                  minor_breaks = 1961:1977) +
  scale_color_manual("Model", values=c("red3", "steelblue"))

ggsave("./figures/overTimeGlobalSouth.pdf", width=7, height=4.5)